package com.novel.crawl.bxwx9.spider.processor;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;

/**
 * @author zhilong.deng@hand-china.com
 * @date 2018/11/1
 * @version 1.0
 */
public class BookProcessor implements PageProcessor {

    private static final String BOOK_INFO = "binfo";

    private final Site site = Site.me()
                                .setCharset("UTF-8")
                                .setRetryTimes(3)
                                .setCycleRetryTimes(3)
                                .setSleepTime(1000);

    @Override
    public void process(Page page) {
        if (page.getRequest().getUrl().contains(BOOK_INFO)) {
            // 书名
            page.putField("name", page.getHtml().xpath("//tbody//tr[1]/td[1]//font//text()"));
            // 类型
            page.putField("type", page.getHtml().xpath("//tbody//tr[1]/td[1]//font//a[2]//text()"));
            // 封面
            page.putField("image", page.getHtml().xpath("//*[@id='centerm']/table[1]/tbody/tr/td/table[3]/tbody/tr/td/table[1]/tbody/tr/td[1]/table/tbody/tr/td/a/img//@src"));
            // 作者
            page.putField("author", page.getHtml().xpath("//*[@id='centerm']/table[1]/tbody/tr/td/table[3]/tbody/tr/td/table[1]/tbody/tr/td[2]/table/tbody/tr[1]/td[4]/a//text()"));
            // 连接
            page.putField("url", page.getHtml().xpath("//*[@id='centerm']/table[1]/tbody/tr/td/table[3]/tbody/tr/td/table[2]/tbody/tr/td/table/tbody/tr[5]/td/a[2]//@href"));
            // 详情
            page.putField("desc", page.getHtml().xpath("//*[@id='centerm']/table[1]/tbody/tr/td/table[3]/tbody/tr/td/table[2]/tbody/tr/td/table/tbody/tr[2]/td[1]/div/text()"));
            // 状态
            page.putField("status", page.getHtml().xpath("//*[@id='centerm']/table[1]/tbody/tr/td/table[3]/tbody/tr/td/table[1]/tbody/tr/td[2]/table/tbody/tr[2]/td[6]/text()"));
        } else {
            // 添加下一页url
            page.addTargetRequests(page.getHtml().xpath("//a[@class='next']//@href").all());
            // 添加本页所有书籍url
            page.addTargetRequests(page.getHtml().xpath("//td[@class='odd']//a//@href").all());
            page.setSkip(true);
        }
    }

    @Override
    public Site getSite() {
        return site;
    }
}
